library(MASS) # Datasets library(mice) # Boys dataset library(dplyr) # Data manipulation library(magrittr) # Pipes library(ggplot2) # Plotting suite library(sf) # Spatial features
library(MASS) # Datasets library(mice) # Boys dataset library(dplyr) # Data manipulation library(magrittr) # Pipes library(ggplot2) # Plotting suite library(sf) # Spatial features
New functions:
XXXXX
plot(): R’s plotting devicebarplot(): bar plot functionpie(): pie plot functionhist(): histogramdensity(): function that calculates the densityboxplot(): box plot function XXXXXSource: Anscombe, F. J. (1973). “Graphs in Statistical Analysis”. American Statistician. 27 (1): 17–21.
Source: https://www.autodeskresearch.com/publications/samestats
base graphics in Rggplot2 graphicsbase graphics in Rheight <- c(50.1, 53.5, 50.0, 54.5, 57.5) weight <- c(3.65, 3.37, 3.14, 4.27, 5.03)
boys <- boys head(boys)
## age hgt wgt bmi hc gen phb tv reg ## 3 0.035 50.1 3.650 14.54 33.7 <NA> <NA> NA south ## 4 0.038 53.5 3.370 11.77 35.0 <NA> <NA> NA south ## 18 0.057 50.0 3.140 12.56 35.2 <NA> <NA> NA south ## 23 0.060 54.5 4.270 14.37 36.7 <NA> <NA> NA south ## 28 0.062 57.5 5.030 15.21 37.3 <NA> <NA> NA south ## 36 0.068 55.5 4.655 15.11 37.0 <NA> <NA> NA south
To subset a vector use square brackets v[1:5]
To call a variable in the data frame, use the $ notation:
boys$hgt
boys$hgt[1:10]
## [1] 50.1 53.5 50.0 54.5 57.5 55.5 52.5 53.0 55.1 54.5
plot(x = boys$hgt, y = boys$wgt, main = "Scatter plot",
xlab = "Height", ylab = "Weight", bty = "L")
Breaking this down:
plot( # Calling the base plot function
x = boys$hgt, # x coordinates
y = boys$wgt, # y coordinates
main = "Scatter plot",# Plot title
xlab = "Height", # x-axis label
ylab = "Weight", # y axis label
bty = "L" # Type of box around plot: "Shaped like an L"
)
See ?par for more graphical parameters
Two ways of doing the same thing
plot(x = boys$hgt, y = boys$wgt) plot( wgt ~ hgt, data = boys)
The latter is the plot method for formulas. (Formulas are an R class!)
plot(x = 1:5, y = exp(1:5), type = "l", main = "Line chart", bty = "L")
Beyond basic scatter/line plots, there are specific functions:
counts <- table(boys$reg) barplot(counts, main="Bar chart", ylab = "N")
counts <- table(boys$reg) pie(x=counts, main="Pie chart")
Let’s go one step back:
counts <- table(boys$reg) counts
## ## north east west south city ## 81 161 239 191 73
hist(boys$hgt, main = "Histogram", xlab = "Height")
boxplot(boys$hgt ~ boys$reg, main = "Box plot",
xlab = "Region", ylab = "Height")
plot(): Generic function for plotting of R objectsbarplot(): bar plotspie(): pie chartshist(): histogramsboxplot(): box plots functionpar(): Set or query graphical parametersdensity(): function that calculates the densityggplotggplot2?Layered plotting based on the book The Grammar of Graphics by Leland Wilkinsons.
With ggplot2 you
ggplot2 then takes care of the details
1: Provide the data
ggplot( data = boys )
2: map variable to aesthetics
ggplot( data = boys, aes(x = hgt, y = wgt) )
3: state which geometric object to display
ggplot( data = boys, aes(x = hgt, y = wgt) ) + geom_point()
Create the plot
gg <- ggplot( data = boys, aes( x = hgt, y = wgt ) ) + geom_point( col = "dark green" )
Add another layer (smooth fit line)
gg <- gg + geom_smooth(col = "dark blue")
Give it some labels and a nice look
gg <- gg + labs(x = "Age", y = "BMI", title = "BMI trend for boys") + theme_minimal()
plot(gg)
gg <-
boys %>%
filter(!is.na(reg)) %>%
ggplot(aes(x = hgt,
y = wgt,
shape = reg,
colour = age)) +
geom_point( alpha = 0.5) +
labs(title = "Trend for boys",
x = "Height",
y = "Weight",
shape = "Region",
colour = "Age") +
theme_minimal()
plot(gg)
geom_point
geom_bar
geom_line
geom_smooth
geom_histogram
geom_boxplot
geom_density
facet_wrap() and facet_grid() divide figures into panels.
boys %>% ggplot(aes(x = age, y = bmi)) + geom_point() + geom_smooth() + facet_wrap(~ reg)
Easy with ggsave()
# save as pdf
ggssave("plot.pdf", myplot)
# save as png and specify dimensions
ggssave("plot.png", myplot, width = 7, height = 5, units="in")
sf packagedata.framesWe have time for a cursory introduction at most.
denmark <- st_read("DK_map.shp")
plot(st_geometry(denmark))
denmark$proportion.over.70 <- denmark$over70/denmark$population
plot(denmark["proportion.over.70"],
main = "Proportion of population aged 70 years and above")
ggplotdenmark %>% ggplot(aes(fill=proportion.over.70)) + geom_sf()